(define-module (fiasco finder)
  #:use-module (ice-9 control)
  #:use-module (ice-9 match)
  #:use-module (ice-9 popen)
  #:use-module (ice-9 regex)
  #:use-module (ice-9 textual-ports)
  #:use-module (gnu packages)
  #:use-module (guix base32)
  #:use-module (guix build utils)
  #:use-module (guix download)
  #:use-module ((guix build download)
		#:select (url-fetch)
		#:prefix build:)
  #:use-module (guix download)
  #:use-module (guix packages)
  #:use-module (guix scripts download)
  #:use-module (guix scripts hash)
  #:use-module (guix store)
  #:use-module (guix ui)
  #:use-module (srfi srfi-1)
  #:use-module (srfi srfi-9)
  #:use-module (srfi srfi-19)

  #:export (result
	    result?
	    result-package-name
	    result-package-version
	    result-guix-hash
	    result-upstream-hash
	    result-hash-ok?
	    result-safe-to-update?
	    result-date
	    result->package

	    results-dir
	    results-file
	    results-file->results
	    results->results-file
	    purge-deprecated-results!

	    find-problematic-packages))

;;; Commentary: Finds GitHub packages whose hash got broken.
;;; Requirements: tar and diff command line tools.

;; Workaround Geiser bug #83 (see:
;; https://github.com/jaor/geiser/issues/83)
(guix-warning-port (current-warning-port))

;;;
;;; Parameters to configure.
;;;

(define substitute-urls
  (make-parameter (cons* "https://berlin.guixsd.org"
			 "https://bayfront.guixsd.org"
			 %default-substitute-urls)))

(define results-dir
  (make-parameter (string-append (getenv "HOME") "/src/guile-hacks/fiasco")))

(define results-file
  (make-parameter (string-append (results-dir) "/results.txt")))

(define tar-diff-dir
  (make-parameter (string-append (results-dir) "/tar-diffs")))

;;;
;;; Data structures and supporting functions.
;;;

(define-record-type <result>
  (make-result package-name package-version guix-hash
	       upstream-hash hash-ok? safe-to-update? date)
  result?
  (package-name result-package-name)
  (package-version result-package-version)
  (guix-hash result-guix-hash)
  (upstream-hash result-upstream-hash)
  (hash-ok? result-hash-ok?)
  (safe-to-update? result-safe-to-update?)
  (date result-date))

(define (result->sexp result)
  (list (result-package-name result)
	(result-package-version result)
	(result-guix-hash result)
	(result-upstream-hash result)
	(result-hash-ok? result)
	(result-safe-to-update? result)
	(result-date result)))

(define (sexp->result sexp)
  (match sexp
    ((package-name package-version guix-hash
		   upstream-hash safe-to-update? result-hash-ok? date)
     (make-result package-name package-version guix-hash
		  upstream-hash safe-to-update? result-hash-ok? date))))

(define (results-file->results file)
  "Read the results from FILE and return the list of result records."
  (with-input-from-file file
    (lambda ()
      (let loop ((line (read (current-input-port))))
	(if (eof-object? line)
	    '()
	    (cons (sexp->result line)
		  (loop (read (current-input-port)))))))))

(define (result-package-exist? result)
    "Return the package referred to by RESULT or #f if it doesn't exist."
  (let* ((name (result-package-name result))
	 (version (result-package-version result))
	 (packages (find-best-packages-by-name name version)))
    (not (null? packages))))

(define (result->package result)
  "Return the package referred to by RESULT or null if it doesn't exist."
  (let* ((name (result-package-name result))
	 (version (result-package-version result))
	 (packages (find-best-packages-by-name name version)))
    (if (null? packages)
	(begin
	  (warn (format #f "The package ~a, version ~a is no longer in Guix"
			name version))
	  '())
	(first packages))))

(define (results->results-file results file)
  "Overwrite the FILE content with the RESULTS."
  (with-output-to-file file
    (lambda ()
      (for-each (lambda (result)
		  (write (result->sexp result) (current-output-port))
		  (display "\n" (current-output-port)))
		results))))

(define (result<? result1 result2)
  "Predicate to sort results alphabetically by name and versions."
  (let ((name1 (result-package-name result1))
	(name2 (result-package-name result2))
	(version1 (result-package-version result1))
	(version2 (result-package-version result2)))
  (or (string<? name1 name2)
      (and (string=? name1 name2)
	   (string<? version1 version2)))))

(define (purge-deprecated-results! file)
  "Overwrite FILE after purging the results of Guix packages no longer
available."
  (let* ((all-results (results-file->results file))
	 (valid-results
	  (sort (filter result-package-exist? all-results) result<?)))
    (results->results-file valid-results file)))



;;;
;;; Functions and procedures.
;;;

(define (package<? package1 package2)
  "Predicate to sort packages alphabetically by name and versions."
  (or (string<? (package-name package1) (package-name package2))
      (and (string=? (package-name package1) (package-name package2))
	   (string<? (package-version package1) (package-version package2)))))

(define (problematic-uri? uri)

  (define (contains-github-archive? uri)
    (regexp-match? (string-match "github.com/.*/archive/" uri)))

  ;; URI can be a string or a list of string.
  (match uri
    ((uri1 uri2 ...)			;match list of strings
     (not (null? (filter contains-github-archive? uri))))
    (uri1				;match string
     (contains-github-archive? uri1))))

(define (problematic-github-package? package)
  (let ((source (package-source package)))
    (and (origin? source)
	 (eq? (origin-method source) url-fetch)
	 (problematic-uri? (origin-uri source)))))

(define (problematic-github-packages)
  "Return the list of all the potentially problematic GitHub packages in Guix."
  (sort (fold-packages (lambda (p r)
			 (if (problematic-github-package? p)
			     (cons p r)
			     r))
		       '())
	package<?))

(define* (already-checked-packages #:optional (file (results-file)))
  "List of already checked packages."
  (if (file-exists? file)
      (filter package? (map result->package
			    (results-file->results file)))
      '()))

(define (origin->nix-base32-bash origin)
  (bytevector->nix-base32-string (origin-sha256 origin)))

(define (origin->download-uri-suffix origin)
  "Form the suffix part of the URI of a downloadable substitute file."
  (let ((file-name (origin-actual-file-name origin))
	(hash (origin->nix-base32-bash origin)))
    (string-append "/file/" file-name "/sha256/" hash)))

(define* (download-substitute package file)
  "Download the substitute of PACKAGE and return it as FILE, or #f if
the substitute could not be downloaded."
  (let* ((origin (package-source package))
	 (download-uri-suffix (origin->download-uri-suffix origin)))
    (let/ec return
      (for-each (lambda (url)
		  ;; Do not verify certificate to work around bug#28810.
		  (let* ((uri (string-append url download-uri-suffix))
			 (file (build:url-fetch uri file
						#:verify-certificate? #f)))
		    (when file
		      (return file))))	;abort loop
		(substitute-urls))
      (warn "Failed to download a substitute for package: "
	    (package-name package))
      #f)))

(define (file-hash file)
  "Return the nix-base32 string corresponding to the sha256 hash of FILE."
  (and file
       (string-trim-both (with-output-to-string
			   (lambda ()
			     (guix-hash file))))))

(define (compare-tar-archives archive1 archive2)
  "Return #f if the archives content is the same.  Otherwise, a string
detailing the differences is returned."
  (let* ((tmpdir (tmpnam))
	 (subdir1 (string-append tmpdir "/archive1"))
	 (subdir2 (string-append tmpdir "/archive2"))
	 (name1 (basename archive1))
	 (name2 (basename archive2))
	 (diff-file (string-append (tar-diff-dir) "/"
				   name1 "-" name2 ".diff")))
    (define (untar archive-file dest-dir)
      (unless (zero? (system* "tar" "-C" dest-dir "-xf" archive-file))
	(error "Failed to extract archive: " archive-file)))

    (mkdir-p subdir1)
    (mkdir-p subdir2)
    (mkdir-p (tar-diff-dir))
    (untar archive1 subdir1)
    (untar archive2 subdir2)

    ;; Use --no-dereference to prevent diff failing on broken
    ;; symlinks that archives may contain (e.g. antlr3).
    (let* ((input-pipe (open-pipe* OPEN_READ
				   "diff" "-r" "--no-dereference"
				   subdir1 subdir2))
	   (output (get-string-all input-pipe))
	   (exit-val (status:exit-val (close-pipe input-pipe))))
      (case exit-val
	((0) #f)
	((1)
	 (with-output-to-file diff-file
	   (lambda ()
	     (display output)))
	 (format #t "Diff saved to ~a:~%~a~%" diff-file output))
	(else (error "diff failed comparing the folders: " subdir1 subdir2
		     "exit status: " exit-val))))))

(define (hash-ok? hash1 hash2)
  (and (string? hash1)
       (string? hash2)
       (string=? hash1 hash2)))

(define (check-package-hash package)
  "Verify the hash of a package and return a <result> object.  Assumes
the definition of PACKAGE contains an origin using the url-fetch
method and a base32 encoded sha256 hash."
  (let* ((date (date->string (current-date)))
	 (name (package-name package))
	 (version (package-version package))
	 (origin (package-source package))
	 (tmpdir (tmpnam))
	 (tmpdir! (mkdir-p tmpdir))
	 (file-name (origin-actual-file-name origin))
	 (upstream-archive (string-append tmpdir "/upstream-" file-name))
	 (substitute-archive (string-append tmpdir "/substitute-" file-name))
	 (uri (origin-uri origin))
	 (guix-hash (origin->nix-base32-bash origin))
	 (upstream-hash (file-hash (build:url-fetch uri upstream-archive)))
	 (hash-ok? (hash-ok? upstream-hash guix-hash))
	 (substitute (and upstream-hash ;stop if false
			  (not hash-ok?)
			  (download-substitute package
					       substitute-archive)))
	 (safe-to-update?
	  (if hash-ok?
	      #f	      ;false here means 'no need to update'
	      (and substitute ;stop here if we don't have a substitute
		   (not (compare-tar-archives upstream-archive
					      substitute-archive))))))
    (make-result name version guix-hash upstream-hash hash-ok?
		 safe-to-update? date)))



;;;
;;; Main program
;;;

(define (find-problematic-packages)
  "Find and print the names of the potentially problematic GitHub packages."

  (define (print-packages packages)
    (for-each (lambda (name)
		(format #t "~a~%" name))
	      (map package-name packages))
    (format #t "~%"))

  (define (verify-package-hash package)
    (format #t "~%~a verifying package hash...~%" (package-name package))
    (let* ((result (check-package-hash package))
	   (name (result-package-name result))
	   (guix-hash (result-guix-hash result))
	   (upstream-hash (result-upstream-hash result))
	   (hash-ok? (result-hash-ok? result)))
      (format #t "~a Guix hash:     ~s~%" name guix-hash)
      (format #t "~a upstream hash: ~s~%" name upstream-hash)
      (if hash-ok?
	  (format #t "~a hash OK~%" name)
	  (format #t "~a hash NOK~%" name))
      (cond
       (hash-ok? #t)			;no-op
       ((result-safe-to-update? result)
	(format #t "~a hash can be safely updated~%" name))
       (else (format #t "~a requires manual verification~%" name)))

      ;; Append result to results file.
      (let ((results-file (open-file (results-file) "a")))
	(dynamic-wind
	  (lambda () #f)
	  (lambda ()
	    (write (result->sexp result) results-file)
	    (display "\n" results-file))
	  (lambda () (close results-file))))))

  (let* ((problematic-github-packages (problematic-github-packages))
	 (already-checked-packages (already-checked-packages)))

    (format #t "Number of potentially problematic GitHub packages: ~a~%"
	    (length problematic-github-packages))
    ;;(print-packages problematic-github-packages)

    (unless (null? already-checked-packages)
      (format #t "Skipping ~a already checked packages~%"
	      (length already-checked-packages)))

    (for-each verify-package-hash
    	      (lset-difference eq? problematic-github-packages
    			       already-checked-packages))))

